from IPython.core.display import Image
Image(filename='files/conway.png')
from IPython.core.display import Image
Image(filename='files/GA.png')
Image(filename='files/luna.jpg')
import os
import numpy as np
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
import pysal
import pylab as pl
import shapely
import shapely.geometry
from shapely.geometry import shape
from shapely.geometry import Point, LineString, Polygon, MultiPolygon
from shapely.wkt import dumps, loads
#from shapely.wkb import dumps, loads
import random
def get_random_color():
r = lambda: random.randint(0,255)
return('#%02X%02X%02X' % (r(),r(),r()))
def plot(shapelyGeometries, color_dict={"fill":"#AADDCC", "line":"#666666","hole_fill":"#ffffff", "hole_line":"#999999" }):
'Plot shapelyGeometries'
figure = pl.figure(num=None, figsize=(10, 10), dpi=180)
axes = pl.axes()
axes.set_aspect('equal', 'datalim')
axes.xaxis.set_visible(False)
axes.yaxis.set_visible(False)
draw(shapelyGeometries, color_dict)
def draw(gs, color_dict):
'Draw shapelyGeometries'
# Handle single and multiple geometries
try:
gs = iter(gs)
except TypeError:
gs = [gs]
# For each shapelyGeometry,
for g in gs:
gType = g.geom_type
if gType.startswith('Multi') or gType == 'GeometryCollection':
draw(g.geoms, color_dict)
else:
draw_(g, color_dict)
def draw_(g, color_dict):
'Draw a shapelyGeometry; thanks to Sean Gilles'
gType = g.geom_type
if gType == 'Point':
pl.plot(g.x, g.y, 'k,')
elif gType == 'LineString':
x, y = g.xy
pl.plot(x, y, 'b-')
elif gType == 'Polygon':
#can draw parts as multiple colors
if not color_dict:
color_dict={"fill":get_random_color(),
"line":"#666666",
"hole_fill":"#FFFFFF",
"hole_line":"#999999" }
x, y = g.exterior.xy
pl.fill(x, y, color=color_dict["fill"], aa=True)
pl.plot(x, y, color=color_dict["line"], aa=True, lw=1.0)
for hole in g.interiors:
x, y = hole.xy
pl.fill(x, y, color=color_dict["hole_fill"], aa=True)
pl.plot(x, y, color=color_dict["hole_line"], aa=True, lw=1.0)
pt = Point(0, 0)
print pt
plot(pt)
POINT (0 0)
circle = pt.buffer(.5)
plot([circle, pt])
print circle
POLYGON ((0.5 0, 0.4975923633360985 -0.04900857016478025, 0.4903926402016152 -0.09754516100806404, 0.4784701678661045 -0.1451423386272311, 0.4619397662556435 -0.1913417161825447, 0.4409606321741776 -0.2356983684129986, 0.4157348061512728 -0.2777851165098009, 0.3865052266813687 -0.3171966420818225, 0.3535533905932741 -0.3535533905932735, 0.3171966420818231 -0.3865052266813682, 0.2777851165098016 -0.4157348061512723, 0.2356983684129993 -0.4409606321741772, 0.1913417161825454 -0.4619397662556431, 0.1451423386272318 -0.4784701678661042, 0.09754516100806482 -0.4903926402016151, 0.04900857016478104 -0.4975923633360984, 8.077722872162934e-16 -0.5, -0.04900857016477944 -0.4975923633360985, -0.09754516100806324 -0.4903926402016154, -0.1451423386272302 -0.4784701678661047, -0.1913417161825439 -0.4619397662556438, -0.2356983684129978 -0.440960632174178, -0.2777851165098002 -0.4157348061512732, -0.317196642081822 -0.3865052266813691, -0.3535533905932731 -0.3535533905932745, -0.3865052266813679 -0.3171966420818234, -0.4157348061512722 -0.2777851165098018, -0.4409606321741771 -0.2356983684129995, -0.4619397662556431 -0.1913417161825456, -0.4784701678661042 -0.1451423386272318, -0.4903926402016151 -0.09754516100806473, -0.4975923633360984 -0.04900857016478086, -0.5 -5.053215498074303e-16, -0.4975923633360985 0.04900857016477985, -0.4903926402016153 0.09754516100806375, -0.4784701678661045 0.1451423386272309, -0.4619397662556435 0.1913417161825446, -0.4409606321741776 0.2356983684129986, -0.4157348061512727 0.277785116509801, -0.3865052266813686 0.3171966420818226, -0.3535533905932738 0.3535533905932737, -0.317196642081823 0.3865052266813683, -0.2777851165098015 0.4157348061512723, -0.2356983684129993 0.4409606321741772, -0.1913417161825456 0.4619397662556431, -0.1451423386272321 0.4784701678661042, -0.09754516100806521 0.490392640201615, -0.04900857016478155 0.4975923633360983, -1.424116139486239e-15 0.5, 0.04900857016477872 0.4975923633360986, 0.0975451610080624 0.4903926402016155, 0.1451423386272293 0.478470167866105, 0.1913417161825429 0.4619397662556442, 0.2356983684129968 0.4409606321741786, 0.2777851165097991 0.415734806151274, 0.3171966420818207 0.3865052266813701, 0.3535533905932718 0.3535533905932757, 0.3865052266813666 0.317196642081825, 0.4157348061512709 0.2777851165098037, 0.440960632174176 0.2356983684130017, 0.461939766255642 0.1913417161825481, 0.4784701678661034 0.1451423386272346, 0.4903926402016145 0.09754516100806784, 0.4975923633360981 0.04900857016478424, 0.5 4.119267568565299e-15, 0.5 0))
small_circle = pt.buffer(.25)
plot([circle, small_circle, pt], color_dict=None)
donut = circle.difference(small_circle)
plot(donut)
lineString1 = LineString(((-3, -5), (3, 5)))
print lineString1
plot(lineString1)
LINESTRING (-3 -5, 3 5)
How about a super crazy line? Oh yes, this is super crazy.
lineString2 = LineString(((-3, -5), (3, 5), (-3, 5), (3, -5)))
print lineString2
plot(lineString2)
LINESTRING (-3 -5, 3 5, -3 5, 3 -5)
And now we can add polygons to our linestrings to make a sad bunny.
pt1 = Point(-3, 6.5)
pt2 = Point(3, 6.5)
left = pt1.buffer(.5)
right = pt2.buffer(.5)
poly = shapely.geometry.Polygon(
((-3, 5), (3, 5), (0, 0), (-3, 5)))
plot([lineString2, left, right, poly], {"fill":"#000000", "line":"#AA8888","hole_fill":"#ffffff", "hole_line":"#999999" })
Let's look at a Polygon with holes.
polygon1 = shapely.geometry.Polygon(
((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)), # Shell
[
((2, 2), (2, 3), (3, 3), (3, 2), (2, 2)), # Hole
((3, 3), (3, 4), (4, 4), (4, 3), (3, 3)), # Hole
]
)
print polygon1
plot(polygon1)
POLYGON ((1 1, 5 1, 5 5, 1 5, 1 1), (2 2, 2 3, 3 3, 3 2, 2 2), (3 3, 3 4, 4 4, 4 3, 3 3))
You can use union() to create more complex shapes.
circle = Point(5,2.5).buffer(2)
plot([circle, polygon1], color_dict=None)
Now, let's look at some more complicated shapes.
new_polygon = polygon1.union(circle)
plot(new_polygon)
print new_polygon.type
Polygon
circle2 = Point(7,7).buffer(2)
plot([circle2, polygon1], color_dict=None)
new_multipolygon = polygon1.union(circle2)
plot(new_multipolygon)
print new_multipolygon.type
print "CIRCLE AREA: %f" % (circle2.area)
print "POLYGON AREA: %f" % (polygon1.area)
print "MULTIPOLYGON AREA: %f" % (new_multipolygon.area)
MultiPolygon CIRCLE AREA: 12.546194 POLYGON AREA: 14.000000 MULTIPOLYGON AREA: 26.546194
One other way to get shape data is to get it from a file. Below is an example of a function that creates a dictionary of shapes keyed by an id column
import fiona
def get_from_fiona(filename, id_attr):
from shapely.geometry import shape
data = {}
c = fiona.open(filename, 'r')
for shape_data in c:
name = shape_data['properties'][id_attr]
parsed_shape = shape(shape_data['geometry'])
if (name in data):
print ("DUPLICATE: " + str(name))
dup_shape = data[name]
new_shape = merge_duplicates(shapely.wkt.dumps(dup_shape), shapely.wkt.dumps(parsed_shape))
parsed_shape = new_shape
data[name] = parsed_shape
#print name + " : " +str(parsed_shape)
return data
Just a quick note, you can get your own shape data from the US Census here: https://www.census.gov/cgi-bin/geo/shapefiles2013/main
We're going to look at state shapes from the US Census. Just to give you an idea of what is in the file, here is a glance at the 'properties' of a few records.
from shapely.wkt import dumps, loads
state_dict = get_from_fiona("files/tl_2013_us_state/tl_2013_us_state.shp", "NAME")
#print(shapely.wkt.dumps(state_dict["Ohio"]))
ohio = state_dict["New Jersey"]
print ohio.convex_hull
POLYGON ((-75.015123 38.788657, -75.56053799999999 39.455645, -75.563586 39.483048, -75.559737 39.630452, -75.135521 40.976865, -75.13134099999999 40.98998, -75.131332 40.989998, -75.131096 40.990464, -75.13057499999999 40.991093, -74.830057 41.2872, -74.79504 41.320407, -74.794141 41.32118699999999, -74.79310199999999 41.321846, -74.755894 41.34528, -74.75393099999999 41.346135, -74.694914 41.357423, -74.694694 41.35736, -73.893979 40.997205, -73.88506 40.452776, -73.88708199999999 40.345009, -73.89079099999999 40.31069, -73.898208 40.274353, -74.034615 39.762427, -74.038158 39.75012, -74.043863 39.736353, -74.04948899999999 39.723785, -74.083316 39.669562, -74.096531 39.649949, -74.183189 39.532994, -74.265918 39.422415, -74.28727099999999 39.398252, -74.31113499999999 39.375117, -74.733423 38.967658, -74.74310699999999 38.958885, -74.81505199999999 38.901819, -74.81930299999999 38.899009, -75.015123 38.788657))
plot(state_dict["Pennsylvania"])
print
plot([state_dict["Pennsylvania"],
state_dict["Ohio"],
state_dict["West Virginia"],
state_dict["Maryland"],
state_dict["Kentucky"],
state_dict["Indiana"],
state_dict["Michigan"]], color_dict=None)
print
You might notice that Michigan and Maryland look a little/lot wrong. This is because the USGS sometimes includes water features that are counted as part of a state's territory as part of their borders. Maryland has the Chesapeake as part of its shape and Michigan includes a great deal of the Great Lakes. Below is a more familiar picture of what Michigan looks like.
Image(filename='files/MI.png')
It's also handy to simplify shapes. This can be a net good for speed, especially where details aren't as pertinent. However, there are drawbacks in the form of loss of precision and potential introduction of unsound polygons. Shapely's simplification methodology is based on the Ramer-Douglas-Peucker algorithm. For more information see here: http://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm

"Douglas-Peucker animated" by Mysid - Own work; self-made in Inkscape and Gimp. Based on File:Douglas Peucker.png by de:User:Leupold.. Licensed under CC BY-SA 3.0 via Wikimedia Commons.
texas_simple = texas.simplify(.001)
plot(texas_simple)
print "TOTAL POINTS: %d" % (points(texas_simple))
TOTAL POINTS: 4509
texas_simpler = texas.simplify(.05)
plot(texas_simpler)
print "TOTAL POINTS: %d" % (points(texas_simpler))
TOTAL POINTS: 110
texas_simplest = texas.simplify(.1)
plot(texas_simplest)
print "TOTAL POINTS: %d" % (points(texas_simplest))
TOTAL POINTS: 47
not_texas = texas.simplify(1.1)
plot(not_texas)
print "TOTAL POINTS: %d" % (points(not_texas))
TOTAL POINTS: 11
Let's look at some further operations that can help us simplify and understand the shapes we are generating.
plot(texas.convex_hull)
plot(texas.envelope)
print(texas.bounds)
(-106.645646, 25.837163999999998, -93.508039, 36.500704)
print(texas.centroid)
center_circle1 = texas.centroid.buffer(.2)
center_circle2 = texas.centroid.buffer(.5)
center_circle3 = texas.centroid.buffer(1)
plot([texas, center_circle3, center_circle2, center_circle1, texas.centroid], color_dict=None)
POINT (-99.31713786238799 31.44721635815321)
from shapely.ops import cascaded_union, unary_union
buffered_points_list = []
x_ys = range(5);
for x in x_ys:
buffered_points_list.append(Point(x,x).buffer(1))
union_shape = cascaded_union(buffered_points_list)
plot(buffered_points_list, color_dict=None)
plot(union_shape)
Intersections are great for showing the degree of overlap as well as creating new shapes.
intersection_shape = buffered_points_list[0].intersection(buffered_points_list[1])
plot([buffered_points_list[0], buffered_points_list[1], intersection_shape], color_dict=None)
Difference is the inverse of intersection and shows the parts of a shape that do not overlap. Unlike the intersection function, this operation is not symmetrical
difference_shape1 = buffered_points_list[0].difference(buffered_points_list[1])
difference_shape2 = buffered_points_list[1].difference(buffered_points_list[0])
plot([difference_shape1,difference_shape2], color_dict=None)
Here are a few examples of shape tests just to show how subtle differences in test definitions can affect results.
print "Do the full circle shapes intersect?"
print(buffered_points_list[0].intersects(buffered_points_list[1]))
print "Do the difference shapes intersect?"
print(difference_shape1.intersects(difference_shape2))
print "Are the difference shapes overlapping?"
print(difference_shape1.overlaps(difference_shape2))
Do the full circle shapes intersect? True Do the difference shapes intersect? True Are the difference shapes overlapping? False
First, there are self-intersections
sad_polygon1 = Polygon(((-3, -5),(-3,0),(-5,0),(-3,0),(-3, 5),(3, 5), (3, -5), (-3, -5)))
plot(sad_polygon1)
print explain_validity(sad_polygon1)
Self-intersection[-5 0]
print "SAD POLYGON AREA: %f" % (sad_polygon1.area)
fixed_polygon1 = sad_polygon1.buffer(.03)
plot(fixed_polygon1)
print explain_validity(fixed_polygon1)
print "HAPPY POLYGON AREA: %f" % (fixed_polygon1.area)
SAD POLYGON AREA: 60.000000 Valid Geometry HAPPY POLYGON AREA: 61.082434
Let's look at another type of self-intersection, the bowtie. It can be more tricky than dealing with the extra line.
from shapely.validation import explain_validity
sad_polygon2 = Polygon(((-3, -5), (3, 5), (-3, 5), (3, -5), (-3, -5)))
plot(sad_polygon2)
print explain_validity(sad_polygon2)
Self-intersection[0 0]
Using the buffer method for cleanup can lead to a deletion of part of the unsound shape. Notice that the area of this shape is calculated as 0.
print sad_polygon2.area
fixed_polygon2 = sad_polygon2.buffer(0)
plot(fixed_polygon2)
print explain_validity(fixed_polygon2)
print fixed_polygon2.area
0.0 Valid Geometry 15.0
Other solutions also fail due to the fact that the shape is too unsound for unions.
from shapely.validation import explain_validity
print explain_validity(sad_polygon2)
pt = Point(0,0)
band_aid = pt.buffer(.2)
plot([sad_polygon2, band_aid], color_dict=None)
try:
trythis = sad_polygon2.union(band_aid)
except(shapely.geos.TopologicalError):
print "ERROR"
Self-intersection[0 0] ERROR
Here is a quick look at the stack trace without the exception handling for your edification.
---------------------------------------------------------------------------
TopologicalError Traceback (most recent call last)
<ipython-input-157-cfbdb28f0e7f> in <module>()
4 band_aid = pt.buffer(.2)
5 plot([sad_polygon2, band_aid])
----> 6 trythis = sad_polygon2.union(band_aid)
7
8 try:
/Users/alisonalvarez/anaconda/lib/python2.7/site-packages/shapely/geometry/base.pyc in union(self, other)
489 def union(self, other):
490 """Returns the union of the geometries (Shapely geometry)"""
--> 491 return geom_factory(self.impl['union'](self, other))
492
493 # Unary predicates
/Users/alisonalvarez/anaconda/lib/python2.7/site-packages/shapely/topology.pyc in __call__(self, this, other, *args)
45 if not this.is_valid:
46 raise TopologicalError(
---> 47 "The operation '%s' produced a null geometry. Likely cause is invalidity of the geometry %s" % (self.fn.__name__, repr(this)))
48 elif not other.is_valid:
49 raise TopologicalError(
TopologicalError: The operation 'GEOSUnion_r' produced a null geometry. Likely cause is invalidity of the geometry <shapely.geometry.polygon.Polygon object at 0x110846350>
File "<ipython-input-266-a87d6c900e35>", line 2 --------------------------------------------------------------------------- ^ SyntaxError: invalid syntax
The best way to deal with this kind of unsound shape is to not make it in the first place. In the case where this isn't really an option (such as with simplification) it's possible to clean up and simplify component shapes to prevent this. Complexity should be tamed.
Let's take a look at some unsound shapes that are a result of holes in the wrong place.
sad_polygon3 = shapely.geometry.Polygon(
((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)), # Shell
[
((2, 2), (2, 3), (3, 3), (3, 2), (2, 2)), # Hole
((6, 6), (6, 7), (7, 7), (7, 6), (6, 6)), # Hole
]
)
plot(sad_polygon3)
print explain_validity(sad_polygon3)
Hole lies outside shell[6 6]
print "SAD POLYGON AREA: %f" % (sad_polygon3.area)
fixed_polygon3 = sad_polygon3.buffer(0)
plot(fixed_polygon3)
print explain_validity(fixed_polygon3)
print "FIXED POLYGON AREA: %f" % (fixed_polygon3.area)
print fixed_polygon3
SAD POLYGON AREA: 14.000000 Valid Geometry FIXED POLYGON AREA: 15.000000 POLYGON ((1 1, 1 5, 5 5, 5 1, 1 1), (2 2, 3 2, 3 3, 2 3, 2 2))
sad_polygon4 = shapely.geometry.Polygon(
((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)), # Shell
[
((2, 2), (2, 4), (4, 4), (4, 2), (2, 2)), # Hole
((3, 3), (3, 3.5), (3.5, 3.5), (3.5, 3), (3, 3)), # Hole
]
)
plot(sad_polygon4)
print explain_validity(sad_polygon4)
Holes are nested[3 3]
print "SAD POLYGON AREA: %f" % (sad_polygon4.area)
fixed_polygon4 = sad_polygon4.buffer(0)
plot(fixed_polygon4)
print explain_validity(fixed_polygon4)
print "FIXED POLYGON AREA: %f" % (fixed_polygon4.area)
print fixed_polygon4
SAD POLYGON AREA: 11.750000 Valid Geometry FIXED POLYGON AREA: 12.000000 POLYGON ((1 1, 1 5, 5 5, 5 1, 1 1), (2 2, 4 2, 4 4, 2 4, 2 2))
sad_polygon5 = shapely.geometry.Polygon(
((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)), # Shell
[
((2, 2), (2, 3.5), (3, 3.5), (3, 2), (2, 2)), # Hole
((3, 3), (3, 4), (4, 4), (4, 3), (3, 3)), # Hole
]
)
print explain_validity(sad_polygon5)
print sad_polygon5.is_valid
plot(sad_polygon5)
Self-intersection[3 3] False
print "SAD POLYGON AREA: %f" % (sad_polygon5.area)
fixed_polygon5 = sad_polygon5.buffer(0)
plot(fixed_polygon5)
print explain_validity(fixed_polygon5)
print "FIXED POLYGON AREA: %f" % (fixed_polygon5.area)
print fixed_polygon5
SAD POLYGON AREA: 13.500000 Valid Geometry FIXED POLYGON AREA: 13.500000 POLYGON ((1 1, 1 5, 5 5, 5 1, 1 1), (2 2, 3 2, 3 3, 4 3, 4 4, 3 4, 3 3.5, 2 3.5, 2 2))
sad_polygon6 = shapely.geometry.Polygon(
((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)), # Shell
[
((2, 2), (2, 2), (3, 3), (3, 3), (2, 2)), # Hole
]
)
plot(sad_polygon6)
print explain_validity(sad_polygon6)
Too few points in geometry component[2 2]
def remove_bad_shapes(shape):
if shape.type == 'Multipolygon':
shape_list = shape.geoms
elif shape.type == 'Polygon':
shape_list = [shape]
else:
return None
polygon_list = []
for g in shape_list:
print g
print explain_validity(g)
hole_list = []
for hole in g.interiors:
hole_shape = Polygon(hole)
if "Too few points" not in explain_validity(hole_shape):
hole_list.append(hole)
else:
print "Removed hole: "
print "\t" + str(hole)
if "Too few points" not in explain_validity(Polygon(g.exterior.coords)):
print explain_validity(Polygon(g.exterior.coords))
polygon_list.append(Polygon(g.exterior.coords, hole_list))
else:
print "Removed: "
print "\t" + str(g.exterior.coords)
return MultiPolygon(polygon_list)
print "SAD POLYGON AREA: %f" % (sad_polygon6.area)
fixed_polygon6 = remove_bad_shapes(sad_polygon6)
plot(fixed_polygon6)
print explain_validity(fixed_polygon6)
print "FIXED POLYGON AREA: %f" % (fixed_polygon6.area)
SAD POLYGON AREA: 16.000000 POLYGON ((1 1, 5 1, 5 5, 1 5, 1 1), (2 2, 2 2, 3 3, 3 3, 2 2)) Too few points in geometry component[2 2] Removed hole: LINEARRING (2 2, 2 2, 3 3, 3 3, 2 2) Valid Geometry Valid Geometry FIXED POLYGON AREA: 16.000000
At Rhiza we do a lot of work generating novel shapes from many components of varying quality.
Image(filename='files/unsimplified_clayton.png')
This shape is the result of removing exterior and interior rings that are below a certain size threshold.
Image(filename='files/clayton_simple.png')
Here is another example from the union of zip codes.
Image(filename='files/separate_zips.png')
Image(filename='files/not_cleaned.png')
Image(filename='files/cleaned_up.png')